################################################################################ 
################################################################################ 
#################### Meta-analysis on the Contact Hypothesis ###################
################################################################################ 
################################################################################ 

# This script performs the analysis for each sub samples
# for the project 
# Meta-analysis on the Contact Hypothesis
# by Gwen-Jiro Clochard


################################################################################ 
################################## Import data #################################
################################################################################ 


all_measures <- 
  read_excel(paste(pathdata, "Clean//All_measures.xlsx", sep="//"))

indiv <- 
  read_excel(paste(pathdata, "Clean//Individual_papers.xlsx", sep="//"))


################################################################################ 
################################## Meta setup ##################################
################################################################################ 

pt_bright <- pt_scheme_qualitative("bright")
pt_hc     <- pt_scheme_qualitative("high contrast")
pt_mc     <- pt_scheme_qualitative("medium contrast")
jp_col    <- jp_scheme()
uo_blue   <- c("#1B1172")


################################################################################ 
################################## Sample size #################################
################################################################################ 


##### Forest plot by sample size #####

### Settings
sample_levels <- c("[0,50]", 
                   "[51, 100]", 
                   "[101, 500]", 
                   "[501+]")

sample_colors <- 
  c("[0,50]" = "#1b9e77", 
    "[51, 100]" = "#7570b3", 
    "[101, 500]" = "#d95f02", 
    "[501+]" = "#e7298a")

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_sample <- function(data, sample_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(sample == sample_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = sample_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = sample_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = sample_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = sample_colors
    ) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(sample_levels, function(y) {
  make_forest_by_sample(all_measures, y)
})
names(results_raw) <- sample_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_sample(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  sample_category = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$sample <- factor(
  summary_df$sample,
  levels = sample_levels
)

p_summary <- ggplot(summary_df,
                    aes(x = beta,
                        y = sample,
                        color = sample)) +
  geom_vline(xintercept = 0, color = "grey70") +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = ci_l, xmax = ci_u),
                 height = 0.2) +
  scale_color_manual(
    values = sample_colors
  ) +
  ylab("") +
  xlab("Effect size") +
  ggtitle("Summary Effects by Sample Size") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_sample.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per sample size category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- sort(sample_levels)

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(sample == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


meta_table <- meta_table %>%
  select(Statistic, all_of(sample_levels))

## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_sample.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with year #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ N,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_sample.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
########################### Publication year category ##########################
################################################################################ 


##### Forest plot by age category #####

### Settings

year_levels <- c("<= 2000", 
                 "2001-2010", 
                 "2011-2020", 
                 "2021-2026")
year_colors <- c(
  "<= 2000"   = "#1b9e77",
  "2001-2010" = "#d95f02",
  "2011-2020" = "#7570b3",
  "2021-2026" = "#e7298a"
)

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_year <- function(data, year_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(year_category == year_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model

  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data

  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits

  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot

  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = year_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = year_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = year_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = c(
      "<= 2000"   = rgb(0, 48, 73, maxColorValue = 255),
      "2001-2010" = rgb(10, 147, 150, maxColorValue = 255),
      "2011-2020" = rgb(247, 127, 0, maxColorValue = 255),
      "2021-2026" = rgb(234, 226, 183, maxColorValue = 255)
    )) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(year_levels, function(y) {
  make_forest_by_year(all_measures, y)
})
names(results_raw) <- year_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_year(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  year_category = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$year_category <- factor(
  summary_df$year_category,
  levels = c("2021-2026", 
             "2011-2020", 
             "2001-2010", 
             "<= 2000")
)

p_summary <- ggplot(summary_df,
                    aes(x = beta,
                        y = year_category,
                        color = year_category)) +
  geom_vline(xintercept = 0, color = "grey70") +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = ci_l, xmax = ci_u),
                 height = 0.2) +
  scale_color_manual(
    values = c(
      "<= 2000"   = rgb(0, 48, 73, maxColorValue = 255),
      "2001-2010" = rgb(10, 147, 150, maxColorValue = 255),
      "2011-2020" = rgb(247, 127, 0, maxColorValue = 255),
      "2021-2026" = rgb(234, 226, 183, maxColorValue = 255)
    )
  ) +
  ylab("") +
  xlab("Effect size") +
  ggtitle("Summary Effects by Publication Year") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_year_category.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per year category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- sort(unique(dat$year_category))

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(year_category == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_year.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with year #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Year,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_year.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
############################## Publication status ##############################
################################################################################ 


##### Forest plot by publication status #####

### Settings

published_levels <- c(0, 1)

published_labels <- c(
  "0" = "Unpublished",
  "1" = "Published"
)

published_colors <- c(
  "0" = rgb(249, 65, 68, maxColorValue = 255),
  "1" = rgb(87, 117, 144, maxColorValue = 255)
)

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_published <- function(data, published_value, global_xlim = NULL) {
  
  dat <- data %>%
    filter(Published == published_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  dat$Published <- factor(dat$Published)
  group_color <- published_colors[as.character(published_value)]
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(color = group_color) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u),
                   height = 0.3,
                   color = group_color) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u),
                   height = 0.6,
                   alpha = 0.3,
                   color = group_color) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_xlim) +
    scale_color_manual(values = published_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(published_levels, function(p) {
  make_forest_by_published(all_measures, p)
})
names(results_raw) <- published_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))

global_xlim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(p) {
  make_forest_by_published(all_measures, as.numeric(p), global_xlim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  Published = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$Published <- factor(
  summary_df$Published,
  levels = c("1", "0")
)

p_summary <- ggplot(summary_df,
                    aes(x = Published, y = beta, color = Published)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  ylab("Effect size") +
  xlab("") +
  scale_x_discrete(labels = published_labels) +
  scale_color_manual(values = published_colors) +
  ggtitle("Summary Effects by Publication Status") +
  theme_classic() +
  theme(
    legend.position = "none",   # remove legend if not needed
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_published.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per publication status #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

dat$Published <- factor(dat$Published,
                        levels = c(0,1),
                        labels = c("Unpublished","Published"))


## Multi-level model per category

categories <- levels(dat$Published)

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(Published == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[as.character(cat)]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_published.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with publication status #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Published,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_published.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)



################################################################################ 
############################## Registration status #############################
################################################################################ 


##### Forest plot by registration status #####

### Settings

registration_levels <- c(0, 1)

registration_labels <- c(
  "0" = "Un-registered",
  "1" = "Registered"
)

registration_colors <- c(
  "0" = rgb(249, 65, 68, maxColorValue = 255),
  "1" = rgb(87, 117, 144, maxColorValue = 255)
)

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_registration <- function(data, registration_value, global_xlim = NULL) {
  
  dat <- data %>%
    filter(registration == registration_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  dat$registration <- factor(dat$registration)
  group_color <- registration_colors[as.character(registration_value)]
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(color = group_color) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u),
                   height = 0.3,
                   color = group_color) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u),
                   height = 0.6,
                   alpha = 0.3,
                   color = group_color) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_xlim) +
    scale_color_manual(values = registration_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(registration_levels, function(p) {
  make_forest_by_registration(all_measures, p)
})
names(results_raw) <- registration_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))

global_xlim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(p) {
  make_forest_by_registration(all_measures, as.numeric(p), global_xlim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  registration = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$registration <- factor(
  summary_df$registration,
  levels = c("1", "0")
)

p_summary <- ggplot(summary_df,
                    aes(x = registration, y = beta, color = registration)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  ylab("Effect size") +
  xlab("") +
  scale_x_discrete(labels = registration_labels) +
  scale_color_manual(values = registration_colors) +
  ggtitle("Summary Effects by Registration Status") +
  theme_classic() +
  theme(
    legend.position = "none",   # remove legend if not needed
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_registration.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per publication status #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

dat$registration <- factor(dat$registration,
                        levels = c(0,1),
                        labels = c("Un-registered","Registered"))


## Multi-level model per category

categories <- levels(dat$registration)

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(registration == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[as.character(cat)]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_registration.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with publication status #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ registration,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_registration.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
################################## Average age #################################
################################################################################ 


##### Forest plot by age category #####

### Settings

table(all_measures$age_category)

age_levels <- c("0-17", 
                 "18-25", 
                 "26+")

age_colors <- c(
  "0-17" = rgb(0, 48, 73, maxColorValue = 255),
  "18-25" = rgb(247, 127, 0, maxColorValue = 255),
  "26+" = rgb(234, 228, 183, maxColorValue = 255)
)

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_age <- function(data, age_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(age_category == age_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = age_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = age_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = age_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = age_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(age_levels, function(y) {
  make_forest_by_age(all_measures, y)
})
names(results_raw) <- age_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_age(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  age_category = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$age_category <- factor(
  summary_df$age_category,
  levels = c("26+", "18-25", "0-17")
)

p_summary <- ggplot(summary_df,
                    aes(x = age_category, y = beta, color = age_category)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = age_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by Average age") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_age.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per age category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- sort(unique(dat$age_category))

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(age_category == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_age.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with age #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_age <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Mean_age + I(Mean_age^2),
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_age <- coef_test(
  fit_age,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_age, rob_age) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_age.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)
  



################################################################################ 
################################ Population type ###############################
################################################################################ 


##### Forest plot by population category #####

### Settings

table(all_measures$Population)

population_levels <- c("1. University students and Young adults", 
                "2. Children and adolescents", 
                "3. Army recruits", 
                "4. Specific adults", 
                "5. General adults")

population_colors <- 
  c("1. University students and Young adults" = rgb(0, 18, 25, maxColorValue = 255), 
    "2. Children and adolescents" = rgb(0, 95, 115, maxColorValue = 255), 
    "3. Army recruits" = rgb(10, 147, 150, maxColorValue = 255), 
    "4. Specific adults" = rgb(148, 210, 189, maxColorValue = 255), 
    "5. General adults" = rgb(233, 216, 166, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_population <- function(data, population_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(Population == population_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = population_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = population_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = population_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = population_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(population_levels, function(y) {
  make_forest_by_population(all_measures, y)
})
names(results_raw) <- population_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_population(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  Population = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$Population <- factor(
  summary_df$Population,
  levels = c("5. General adults", 
             "4. Specific adults", 
             "3. Army recruits", 
             "2. Children and adolescents", 
             "1. University students and Young adults")
)

p_summary <- ggplot(summary_df,
                    aes(x = Population, y = beta, color = Population)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = population_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by Population") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_population.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per population category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- sort(unique(dat$Population))

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(Population == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_population.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with population #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_population <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Population,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_population <- coef_test(
  fit_population,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_population, rob_population) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_population.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)




################################################################################ 
#################################### Region ####################################
################################################################################ 


##### Forest plot by region #####

### Settings

table(all_measures$zone)

region_levels <- c("North America", 
                   "Europe", 
                   "East Asia Oceania", 
                   "South Asia", 
                   "Middle East North Africa", 
                   "Sub-Saharan Africa")

region_colors <- 
  c("North America" = rgb(0, 18, 25, maxColorValue = 255), 
    "Europe" = rgb(0, 95, 115, maxColorValue = 255), 
    "East Asia Oceania" = rgb(148, 210, 189, maxColorValue = 255), 
    "South Asia" = rgb(233, 216, 166, maxColorValue = 255), 
    "Middle East North Africa" = rgb(238, 155, 0, maxColorValue = 255), 
    "Sub-Saharan Africa" = rgb(202, 103, 2, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_region <- function(data, region_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(zone == region_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = region_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = region_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = region_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = region_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(region_levels, function(y) {
  make_forest_by_region(all_measures, y)
})
names(results_raw) <- region_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_region(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  zone = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$zone <- factor(
  summary_df$zone,
  levels = c("Sub-Saharan Africa", 
             "Middle East North Africa", 
             "South Asia",
             "East Asia Oceania", 
             "Europe", 
             "North America")
)


p_summary <- ggplot(summary_df,
                    aes(x = zone, y = beta, color = zone)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = region_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by Region") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_region.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per region category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- region_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(zone == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_region.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with region #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_region <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ zone,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_region <- coef_test(
  fit_region,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_region, rob_region) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_region.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
################################### Prejudice ##################################
################################################################################ 


##### Forest plot by prejudice #####

### Settings

table(all_measures$Prejudice)

prejudice_levels <- c("1. Race Ethnicity Caste", 
                      "2. Religion", 
                      "3. Immigrants", 
                      "4. Gender", 
                      "5. LGBTQ+", 
                      "6. Age",
                      "7. Disabilities",
                      "8. Politics",
                      "9. Others")

prejudice_colors <- 
  c("1. Race Ethnicity Caste" = rgb(0, 18, 25, maxColorValue = 255), 
    "2. Religion" = rgb(0, 95, 115, maxColorValue = 255), 
    "3. Immigrants" = rgb(10, 147, 150, maxColorValue = 255), 
    "4. Gender" = rgb(148, 210, 189, maxColorValue = 255), 
    "5. LGBTQ+" = rgb(233, 216, 166, maxColorValue = 255), 
    "6. Age" = rgb(238, 155, 0, maxColorValue = 255),
    "7. Disabilities" = rgb(202, 103, 2, maxColorValue = 255),
    "8. Politics" = rgb(187, 62, 3, maxColorValue = 255),
    "9. Others" = rgb(240, 100, 25, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_prejudice <- function(data, prejudice_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(Prejudice == prejudice_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = prejudice_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = prejudice_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = prejudice_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = prejudice_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(prejudice_levels, function(y) {
  make_forest_by_prejudice(all_measures, y)
})
names(results_raw) <- prejudice_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_prejudice(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  Prejudice = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$Prejudice <- factor(
  summary_df$Prejudice,
  levels = c("9. Others", 
             "8. Politics",
             "7. Disabilities",
             "6. Age",
             "5. LGBTQ+", 
             "4. Gender",
             "3. Immigrants", 
             "2. Religion", 
             "1. Race Ethnicity Caste")
)

p_summary <- ggplot(summary_df,
                    aes(x = Prejudice, y = beta, color = Prejudice)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = prejudice_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by Prejudice") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_prejudice.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per prejudice category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- prejudice_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(Prejudice == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_prejudice.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with prejudice #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_prejudice <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Prejudice,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_prejudice <- coef_test(
  fit_prejudice,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_prejudice, rob_prejudice) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_prejudice.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
################################# Intervention #################################
################################################################################ 


##### Forest plot by intervention #####

### Settings

table(all_measures$Contact_type)

intervention_levels <- c("1. Discussions", 
                         "2. Cooperative games", 
                         "3. Sports", 
                         "4. Classmates", 
                         "5. Neighbors", 
                         "6. Roommates",
                         "7. Army",
                         "8. Others")

intervention_colors <- 
  c("1. Discussions" = rgb(0, 18, 25, maxColorValue = 255), 
    "2. Cooperative games" = rgb(0, 95, 115, maxColorValue = 255), 
    "3. Sports" = rgb(10, 147, 150, maxColorValue = 255), 
    "4. Classmates" = rgb(148, 210, 189, maxColorValue = 255), 
    "5. Neighbors" = rgb(233, 216, 166, maxColorValue = 255), 
    "6. Roommates" = rgb(238, 155, 0, maxColorValue = 255),
    "7. Army" = rgb(202, 103, 2, maxColorValue = 255),
    "8. Others" = rgb(187, 62, 3, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_intervention <- function(data, intervention_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(Contact_type == intervention_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = intervention_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = intervention_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = intervention_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = intervention_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(intervention_levels, function(y) {
  make_forest_by_intervention(all_measures, y)
})
names(results_raw) <- intervention_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_intervention(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  Contact_type = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$Contact_type <- factor(
  summary_df$Contact_type,
  levels = c("8. Others", 
             "7. Army",
             "6. Roommates",
             "5. Neighbors", 
             "4. Classmates", 
             "3. Sports", 
             "2. Cooperative games", 
             "1. Discussions")
)

p_summary <- ggplot(summary_df,
                    aes(x = Contact_type, y = beta, color = Contact_type)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = intervention_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by Intervention") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_intervention.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per intervention category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- intervention_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(Contact_type == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_intervention.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with intervention #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_intervention <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Contact_type,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_intervention <- coef_test(
  fit_intervention,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_intervention, rob_intervention) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_intervention.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)




################################################################################ 
################################## Encounters ##################################
################################################################################ 


##### Forest plot by nb of encounters #####

### Settings

table(all_measures$Nb_encounters)

encounters_levels <- c("1. 1", 
                       "2. 2-10", 
                       "3. 10+")

encounters_colors <- 
  c("1. 1" = rgb(0, 48, 73, maxColorValue = 255), 
    "2. 2-10" = rgb(247, 127, 0, maxColorValue = 255), 
    "3. 10+" = rgb(234, 226, 183, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_encounters <- function(data, encounters_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(Nb_encounters == encounters_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = encounters_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = encounters_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = encounters_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = encounters_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(encounters_levels, function(y) {
  make_forest_by_encounters(all_measures, y)
})
names(results_raw) <- encounters_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_encounters(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  Nb_encounters = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$Nb_encounters <- factor(
  summary_df$Nb_encounters,
  levels = c("3. 10+", 
             "2. 2-10", 
             "1. 1")
)

p_summary <- ggplot(summary_df,
                    aes(x = Nb_encounters, y = beta, color = Nb_encounters)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = encounters_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by number of encounters") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_encounters.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per encounters category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- encounters_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(Nb_encounters == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_encounters.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with encounters #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_encounters <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Nb_encounters,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_encounters <- coef_test(
  fit_encounters,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_encounters, rob_encounters) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_encounters.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)




################################################################################ 
################################### Duration ###################################
################################################################################ 


##### Forest plot by duration #####

### Settings

table(all_measures$Duration)
table(all_measures$duration)

duration_levels <- c("1", 
                     "2-30", 
                     "31+")

duration_colors <- 
  c("1" = rgb(0, 48, 73, maxColorValue = 255), 
    "2-30" = rgb(247, 127, 0, maxColorValue = 255), 
    "31+" = rgb(234, 226, 183, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_duration <- function(data, duration_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(duration == duration_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = duration_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = duration_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = duration_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = duration_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(duration_levels, function(y) {
  make_forest_by_duration(all_measures, y)
})
names(results_raw) <- duration_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_duration(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  duration = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$duration <- factor(
  summary_df$duration,
  levels = c("31+", 
             "2-30", 
             "1")
)

p_summary <- ggplot(summary_df,
                    aes(x = duration, y = beta, color = duration)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = duration_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by duration of contact") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_duration.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per duration category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- duration_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(duration == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_duration.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with duration #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_duration <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Duration + I(Duration^2),
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_duration <- coef_test(
  fit_duration,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_duration, rob_duration) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_duration.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)



################################################################################ 
################################# Outcome type #################################
################################################################################ 


##### Forest plot by outcome type #####

### Settings

table(all_measures$type_outcome)

type_outcome_levels <- c("Survey measure", 
                         "Experimental Game", 
                         "Action (in the field)",
                         "IAT")

type_outcome_colors <- 
  c("Survey measure" = rgb(0, 48, 73, maxColorValue = 255), 
    "Experimental Game" = rgb(247, 127, 0, maxColorValue = 255), 
    "Action (in the field)" = rgb(234, 226, 183, maxColorValue = 255), 
    "IAT" = rgb(152, 193, 217, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_type_outcome <- function(data, type_outcome_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(type_outcome == type_outcome_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = type_outcome_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = type_outcome_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = type_outcome_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = type_outcome_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(type_outcome_levels, function(y) {
  make_forest_by_type_outcome(all_measures, y)
})
names(results_raw) <- type_outcome_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_type_outcome(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  type_outcome = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$type_outcome <- factor(
  summary_df$type_outcome,
  levels = c("IAT", 
             "Action (in the field)", 
             "Experimental Game", 
             "Survey measure")
)


p_summary <- ggplot(summary_df,
                    aes(x = type_outcome, y = beta, color = type_outcome)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = type_outcome_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by type of outcome") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_outcome.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per type_outcome category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- type_outcome_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(type_outcome == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_outcome.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with type_outcome #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
dat$type_outcome <- relevel(
  factor(dat$type_outcome),
  ref = "Survey measure"
)

fit_type_outcome <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ type_outcome,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_type_outcome <- coef_test(
  fit_type_outcome,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_type_outcome, rob_type_outcome) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_outcome.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
################################ Entire outgroup ###############################
################################################################################ 


##### Forest plot by whether the outcome is for the entire outgroup #####

### Settings

table(all_measures$allgroup)

allgroup_levels <- c("Entire outgroup", 
                     "Specifically met")

allgroup_colors <- 
  c("Entire outgroup" = rgb(249, 65, 68, maxColorValue = 255), 
    "Specifically met" = rgb(54, 117, 144, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_allgroup <- function(data, allgroup_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(allgroup == allgroup_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = allgroup_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = allgroup_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = allgroup_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = allgroup_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(allgroup_levels, function(y) {
  make_forest_by_allgroup(all_measures, y)
})
names(results_raw) <- allgroup_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_allgroup(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  allgroup = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$allgroup <- factor(
  summary_df$allgroup,
  levels = c("Specifically met", 
             "Entire outgroup")
)


p_summary <- ggplot(summary_df,
                    aes(x = allgroup, y = beta, color = allgroup)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = allgroup_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by outcome destination") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_allgroup.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per allgroup category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- allgroup_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(allgroup == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_allgroup.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with allgroup #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
dat$allgroup <- relevel(
  factor(dat$allgroup),
  ref = "Specifically met"
)

fit_allgroup <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ allgroup,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_allgroup <- coef_test(
  fit_allgroup,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_allgroup, rob_allgroup) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_allgroup.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


################################################################################ 
############################## Time since contact ##############################
################################################################################ 


##### Forest plot by time since contact #####

### Settings

table(all_measures$Time_since_contact)
table(all_measures$time)

time_levels <- c("0", 
                 "1-30", 
                 "31+")

time_colors <- 
  c("0" = rgb(0, 48, 73, maxColorValue = 255), 
    "1-30" = rgb(247, 127, 0, maxColorValue = 255), 
    "31+" = rgb(234, 226, 183, maxColorValue = 255))

row_height_unit <- 0.2
summary_height  <- 1.5


### Function

make_forest_by_time <- function(data, time_value, global_ylim = NULL) {
  
  dat <- data %>%
    filter(time == time_value) %>%
    mutate(
      e  = effect,
      se = se_effect,
      v  = se_effect^2,
      paper_id = as.integer(factor(Paper_id))
    ) %>%
    filter(!is.na(e) & !is.na(se)) %>%
    group_by(paper_id) %>%
    mutate(n_obs = n()) %>%
    ungroup() %>%
    mutate(obs_id = row_number())
  
  if (nrow(dat) == 0) return(NULL)
  
  ## Multilevel model
  
  fit <- rma.mv(
    yi     = e,
    V      = v,
    random = list(~ 1 | paper_id/obs_id),
    data   = dat,
    method = "REML"
  )
  
  vcov_mat <- vcovCR(fit, cluster = dat$paper_id, type = "CR2")
  ci       <- conf_int(fit, vcov = vcov_mat, test = "Satterthwaite")
  
  overall_beta <- as.numeric(fit$beta)
  
  ## Forest data
  
  dat_forest <- dat %>%
    select(paper_id, paper_key, n_obs) %>%
    distinct() %>%
    left_join(
      dat %>%
        filter(n_obs == 1) %>%
        mutate(
          ci_l   = e - 1.96 * se,
          ci_u   = e + 1.96 * se,
          s_ci_l = e - 1.96 * se,
          s_ci_u = e + 1.96 * se
        ) %>%
        select(paper_id, beta = e, ci_l, ci_u, s_ci_l, s_ci_u),
      by = "paper_id"
    )
  
  idx <- unique(dat_forest$paper_id)
  
  for (i in seq_len(nrow(dat_forest))) {
    if (dat_forest$n_obs[i] > 1) {
      
      temp_dat <- dat %>% filter(paper_id == idx[i])
      
      temp_fit <- rma(
        yi = e,
        vi = v,
        data = temp_dat,
        method = "REML"
      )
      
      dat_forest$beta[i]  <- as.numeric(temp_fit$beta)
      dat_forest$ci_l[i]  <- as.numeric(temp_fit$ci.lb)
      dat_forest$ci_u[i]  <- as.numeric(temp_fit$ci.ub)
      
      b <- as.numeric(temp_fit$beta)
      dat_forest$s_ci_l[i] <- b - 1.96 * median(sqrt(temp_dat$v))
      dat_forest$s_ci_u[i] <- b + 1.96 * median(sqrt(temp_dat$v))
    }
  }
  
  dat_forest <- dat_forest %>%
    mutate(across(c(beta, ci_l, ci_u, s_ci_l, s_ci_u), as.numeric)) %>%
    arrange(-beta) %>%
    mutate(i = row_number())
  
  ## Axis limits
  
  local_range <- c(
    min(c(dat_forest$ci_l, dat_forest$s_ci_l), na.rm = TRUE),
    max(c(dat_forest$ci_u, dat_forest$s_ci_u), na.rm = TRUE)
  )
  
  if (!is.null(global_ylim)) {
    y_range <- global_ylim
  } else {
    y_range <- local_range
  }
  
  ## Plot
  
  p <- ggplot(dat_forest,
              aes(y = i, x = beta)) +
    geom_vline(xintercept = 0, color = "grey70") +
    geom_vline(xintercept = overall_beta, linetype = "dashed") +
    geom_point(aes(color = time_value)) +
    geom_errorbarh(aes(xmin = ci_l, xmax = ci_u, color = time_value),
                   height = 0.3) +
    geom_errorbarh(aes(xmin = s_ci_l, xmax = s_ci_u, color = time_value),
                   height = 0.6, alpha = 0.3) +
    scale_y_continuous(
      breaks = dat_forest$i,
      labels = dat_forest$paper_key,
      limits = c(0.5, max(dat_forest$i) + 0.5),
      expand = c(0, 0)
    ) +
    scale_x_continuous(limits = global_ylim) +
    scale_color_manual(values = time_colors) +
    labs(x = NULL, y = NULL) +   # remove x-axis title
    theme_classic() +
    theme(
      axis.text.x = element_blank(),   # remove x-axis labels
      axis.ticks.x = element_blank(),  # remove x-axis ticks
      axis.line.x = element_blank(),   # remove x-axis line
      axis.text.y = element_text(size = 9),
      legend.position = "none",
      plot.margin = margin(2, 10, 2, 10)
    )
  
  return(list(
    plot  = p,
    beta  = ci$beta,
    ci_l  = ci$CI_L,
    ci_u  = ci$CI_U,
    range = local_range,
    nrows = nrow(dat_forest)
  ))
}


## First pass (to compute global axis)

results_raw <- lapply(time_levels, function(y) {
  make_forest_by_time(all_measures, y)
})
names(results_raw) <- time_levels


## Remove NULL categories

results_raw <- results_raw[!sapply(results_raw, is.null)]

all_ranges <- do.call(rbind,
                      lapply(results_raw, function(x) x$range))


## Get the optimal axes

global_ylim <- c(
  min(all_ranges[,1], na.rm = TRUE),
  max(all_ranges[,2], na.rm = TRUE)
)


## Second pass (identical axis)

results <- lapply(names(results_raw), function(y) {
  make_forest_by_time(all_measures, y, global_ylim)
})
names(results) <- names(results_raw)


## Summary panel

summary_df <- data.frame(
  time = names(results),
  beta  = sapply(results, function(x) x$beta),
  ci_l  = sapply(results, function(x) x$ci_l),
  ci_u  = sapply(results, function(x) x$ci_u)
)

summary_df$time <- factor(
  summary_df$time,
  levels = c("31+", 
             "1-30", 
             "0")
)

p_summary <- ggplot(summary_df,
                    aes(x = time, y = beta, color = time)) +
  geom_hline(yintercept = 0, color = "grey70") +
  geom_point(shape = 15, size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
                width = 0.4) +
  coord_flip(ylim = global_ylim) +
  scale_color_manual(values = time_colors) +
  ylab("Effect size") +
  xlab("") +
  ggtitle("Summary Effects by time since contact") +
  theme_classic() +
  theme(
    legend.position = "none",
    plot.title = element_text(
      size = 10,
      face = "bold",
      margin = margin(b = 4)
    )
  )


## Final figure

forest_plots <- lapply(results, function(x) x$plot)

row_counts <- sapply(results, function(x) x$nrows)

# Use row_height_unit to scale panel sizes
rel_heights <- c(row_counts * row_height_unit, summary_height)


fig_final <- plot_grid(
  plotlist = c(forest_plots, list(p_summary)),
  ncol = 1,
  rel_heights = rel_heights,
  align = "v",
  axis = "lr"
)

fig_final

# Get the proper size for graph
total_height <- sum(pmax(row_counts * row_height_unit, 4)) + summary_height

ggsave(
  filename = file.path(pathfigures, "effect_time.pdf"),
  plot = fig_final,
  width = 10,
  height = total_height,
  units = "in"
)


##### Table with meta-analytic means per time category #####

## Prepare data

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()


## Multi-level model per category

categories <- time_levels

results_list <- list()

for(cat in categories){
  
  dat_sub <- dat %>% filter(time == cat)
  
  if(nrow(dat_sub) == 0) next
  
  fit_ml <- re_meta(dat_sub, cluster = "paper_id", multi.level = TRUE)
  
  tmp <- summary_re_meta(fit_ml) %>%
    as.data.frame() %>%
    t() %>%
    as.data.frame()
  
  colnames(tmp) <- cat
  tmp$Statistic <- rownames(tmp)
  
  results_list[[cat]] <- tmp
}


## Merge tables

meta_table <- reduce(results_list, full_join, by = "Statistic")

meta_table <- meta_table %>%
  select(Statistic, all_of(categories))

meta_table <- meta_table %>%
  mutate(
    Statistic = recode(
      Statistic,
      est      = "Coef.",
      se       = "SE",
      ci       = "95% CI",
      tau2_w   = "Tau2 (within)",
      tau2_b   = "Tau2 (between)",
      i2_w     = "I2 (within)",
      i2_b     = "I2 (between)",
      cluster  = "Num. clusters",
      n        = "Num. obs"
    )
  )


## Export table 

print(
  xtable(meta_table),
  file = file.path(paste(pathtables, "meta_table_by_time.tex", sep = "//")),
  comment = FALSE, 
  include.rownames = FALSE, 
  sanitize.text.function = identity
)


##### Meta-analytic regression with time #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_time <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Time_since_contact + I(Time_since_contact^2),
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_time <- coef_test(
  fit_time,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_time, rob_time) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_time.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


##### Regression with paper fixed effects to only measure the influence of time #####

lm1 <- 
  lm(data = all_measures, 
     formula = effect ~ Time_since_contact + factor(Paper_id))
summary(lm1)


################################################################################ 
############################## Allport conditions ##############################
################################################################################ 


##### Meta-analytic regression experiment values #####

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ mean_common_2 + mean_equal_2 + mean_positive_2 +
    mean_support_2 + mean_friendship_2,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_conditions_exp.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


##### Meta-analytic regression ChatGPT #####

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ common_gpt + equal_gpt + positive_gpt +
    support_gpt + friendship_gpt,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_conditions_gpt.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)


##### Meta-analytic regression own evaluation #####

## Fitting model
fit_year <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ common_GJ + equal_GJ + positive_GJ +
    support_GJ + friendship_GJ,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_year <- coef_test(
  fit_year,
  vcov    = "CR2",
  cluster = dat$paper_id
)

summary_mra_out <- summary_mra(fit_year, rob_year) %>%
  mutate(var = ifelse(grepl("intrcpt", var), "Constant", var))

# Export table
print(
  xtable(summary_mra_out),
  file = file.path(paste(pathtables, "meta_regression_conditions_gj.tex", sep = "//")),
  comment = FALSE,
  include.rownames = FALSE
)



################################################################################ 
################################ Clearing memory ###############################
################################################################################ 

rm(
  all_measures, indiv, 
  all_ranges, dat, dat_sub, fig_final,
  fit_ml,
  fit_age, fit_allgroup, fit_duration, fit_encounters, fit_intervention, 
  fit_population, fit_prejudice, fit_region, fit_time, fit_type_outcome, fit_year,
  forest_plots, jp_col, pt_bright, pt_hc, pt_mc, uo_blue,
  meta_table, p_summary, 
  results, results_list, results_raw, 
  rob_age, rob_allgroup, rob_duration, rob_encounters, rob_intervention, 
  rob_population, rob_prejudice, rob_region, rob_time, rob_type_outcome, rob_year,
  summary_df, summary_mra_out, tmp,
  age_colors, age_levels, 
  allgroup_colors, allgroup_levels, 
  duration_colors, duration_levels, 
  encounters_colors, encounters_levels, 
  intervention_colors, intervention_levels, 
  population_colors, population_levels, 
  prejudice_colors, prejudice_levels, 
  published_colors, published_levels, published_labels,
  region_colors, region_levels, 
  sample_colors, sample_levels,
  time_colors, time_levels, 
  type_outcome_colors, type_outcome_levels, 
  year_colors, year_levels, 
  cat, categories, global_xlim, global_ylim, rel_heights, row_counts, 
  row_height_unit, summary_height, total_height, 
  make_forest_by_age, make_forest_by_allgroup, make_forest_by_duration,
  make_forest_by_encounters, make_forest_by_intervention, 
  make_forest_by_population, make_forest_by_prejudice, make_forest_by_published,
  make_forest_by_region, make_forest_by_time, make_forest_by_type_outcome, 
  make_forest_by_year, make_forest_by_sample
)


################################################################################ 
#################################### THE END ###################################
################################################################################ 
